*********************************************************************************************************
********************* CREATE FILE WITH ELECTED BY YEAR - DISTRICT - PARTY: to create rankstar ***********
*********************************************************************************************************
use dta\DataSet_v28.dta, clear 
drop if year<1950 /* new districts from 1953 */

** Keep only main parties
keep if inlist(party, "sv", "dna", "sp", "v", "krf", "h", "frp", "h_fv")

collapse (sum) elected, by(year districtid party)

rename elected seats
reshape wide seats, i(districtid party) j(year)
save dta\SeatsByYearDistrictParty.dta, replace

*********************************************************************************************************
************************* IMPORT MAIN DATA AND MERGE ****************************************************
*********************************************************************************************************
use dta\DataSet_v28.dta, clear 

** Keep only main parties
keep if inlist(party, "sv", "dna", "sp", "v", "krf", "h", "frp", "h_fv")

drop *pact* /* don't need these variables */
drop if year<1945

sort districtid party
merge m:1 districtid party using dta\SeatsByYearDistrictParty.dta

forvalues i=1953(4)2021 {
replace seats`i'=0 if seats`i'==.
}

gen rankstar=.
replace rankstar=seats2017-rank if year==2021
replace rankstar=seats2013-rank if year==2017
replace rankstar=seats2009-rank if year==2013
replace rankstar=seats2005-rank if year==2009
replace rankstar=seats2001-rank if year==2005
replace rankstar=seats1997-rank if year==2001
replace rankstar=seats1993-rank if year==1997
replace rankstar=seats1989-rank if year==1993
replace rankstar=seats1985-rank if year==1989
replace rankstar=seats1981-rank if year==1985
replace rankstar=seats1977-rank if year==1981
replace rankstar=seats1973-rank if year==1977
replace rankstar=seats1969-rank if year==1973
replace rankstar=seats1965-rank if year==1969
replace rankstar=seats1961-rank if year==1965
replace rankstar=seats1957-rank if year==1961
replace rankstar=seats1953-rank if year==1957

*********************************************************************************
******************************FIXING DUPLICATES *********************************
*********************************************************************************
bysort pid year: egen min_rank=min(rank)
keep if rank==min_rank  /* keeping only the highest ranked entry for each candidate */
drop if party=="v" & year==2013 & district=="aust-agder" /* Venstre ran with identical lists in Vest-Agder and Aust-Agder in 2013. Venstre were 8.5pp away from winning a regular seat in Vest-Agder, and 13.1pp away from winning a regular seat in Aust-Agder, so we keep Vest-Agder */
drop if party=="v" & year==2017 & district=="aust-agder" /* do the same in 2017 */

drop if candidatename_ed=="Anders Lange" & district=="vest-agder" /* Anders Lange was ranked first in both Oslo and Vest-Agder, but performed best in Oslo */
drop if candidatename_ed=="Svend Haakon Jacobsen" & district=="rogaland" /* runs in both districts as hopeless candidate but same rank, arbitrary exclude one */
drop if candidatename_ed=="Kristin Dalehamn" & district=="rogaland" /* runs in both districts as hopeless candidate but same rank, arbitrary exclude one */
drop if candidatename_ed=="Steinar Bastesen" & district=="nord-trøndelag" /* runs in two districts in 2001, and wins in Nordland */

gen running=1  /* ALL CANDIDATES IN DATASET ARE RUNNING BEFORE RESHAPING */

gen zz=1
egen count=sum(zz), by(pid year)
tab party year if count!=1
*********** 
drop if count!=1  /* TENTATIVELY DROP LISTS RUNNING IN MULTIPLE DISTRICTS */

assert count==1 /* verifying that pid year uniquely id. obs. */

keep pid year districtid party rank rankstar pid candidatename_ed elected deputy running female

********************************************************************************************************************
******************* RESHAPING TO GET A BALANCED PANEL SO EVERY CANDIDATE IS IN EVERY ELECTION YEAR *****************
********************************************************************************************************************
sum
reshape wide districtid party rank rankstar candidatename_ed elected running female deputy, i(pid) j(year)
reshape long districtid party rank rankstar candidatename_ed elected running female deputy, i(pid) j(year)
sum

replace elected=0 if elected==.
replace running=0 if running==.
replace rank=99 if rank==.

sort pid year
foreach var in elected running rank deputy districtid rankstar {
forvalues i=0(1)3 {
by pid: gen `var'_next`i'=`var'[_n+`i']
by pid: gen `var'_prev`i'=`var'[_n-`i'] 
}
}

save dta/DataPrep, replace